# Import required libraries
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
import plotly.colors
from datetime import datetime
pio.templates.default = 'plotly_white'
# Read data from CSV file
data = pd.read_csv('rfm_data.csv')
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | |
|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris |
data.shape
(1000, 6)
# Convert 'PurchaseDate' to datetime
data['PurchaseDate'] = pd.to_datetime(data['PurchaseDate'])
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | |
|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris |
# Calculate Recency
data['Recency'] = (datetime.now().date() - data['PurchaseDate'].dt.date).dt.days
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | |
|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 |
# Calculate Frequency
frequency_data = data.groupby('CustomerID')['OrderID'].count().reset_index()
frequency_data.rename(columns = {'OrderID' : 'Frequency'}, inplace = True)
data = data.merge(frequency_data, on = 'CustomerID', how = 'left')
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | |
|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 |
# data.to_csv('rfm_output.csv')
# Calculate Monetary Value
monetary_data = data.groupby('CustomerID')['TransactionAmount'].sum().reset_index()
monetary_data.rename(columns = {'TransactionAmount' : 'MonetaryValue'}, inplace = True)
data = data.merge(monetary_data, on = 'CustomerID', how = 'left')
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 |
# Define scoring criteria for each RFM value
recency_scores = [5, 4, 3, 2, 1] # Higher score for lower recency (more recent)
frequency_scores = [1, 2, 3, 4, 5] # Higher score for higher frequency
monetary_scores = [1, 2, 3, 4, 5] # Higher score for higher monetary value
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 |
# Calculate RFM scores
data['RecencyScore'] = pd.cut(data['Recency'], bins = 5, labels = recency_scores)
data['FrequencyScore'] = pd.cut(data['Frequency'], bins = 5, labels = frequency_scores)
data['MonetaryScore'] = pd.cut(data['MonetaryValue'], bins = 5, labels = monetary_scores)
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | RecencyScore | FrequencyScore | MonetaryScore | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 | 1 | 1 | 2 |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 | 1 | 1 | 1 |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 | 1 | 1 | 1 |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 | 1 | 1 | 1 |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 | 1 | 1 | 2 |
# Convert RFM scores to numeric type
data['RecencyScore'] = data['RecencyScore'].astype(int)
data['FrequencyScore'] = data['FrequencyScore'].astype(int)
data['MonetaryScore'] = data['MonetaryScore'].astype(int)
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | RecencyScore | FrequencyScore | MonetaryScore | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 | 1 | 1 | 2 |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 | 1 | 1 | 1 |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 | 1 | 1 | 1 |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 | 1 | 1 | 1 |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 | 1 | 1 | 2 |
# data.to_csv('rfm_output.csv')
# Calculate RFM score by combining the individual scores
data['RFM_Score'] = data['RecencyScore'] + data['FrequencyScore'] + data['MonetaryScore']
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | RecencyScore | FrequencyScore | MonetaryScore | RFM_Score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 | 1 | 1 | 2 | 4 |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 | 1 | 1 | 1 | 3 |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 | 1 | 1 | 1 | 3 |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 | 1 | 1 | 1 | 3 |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 | 1 | 1 | 2 | 4 |
# Create RFM segments based on RFM score
segment_labels = ['Low-Value', 'Mid-Value', 'High-Value']
data['Value Segment'] = pd.qcut(data['RFM_Score'], q = 3, labels = segment_labels)
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | RecencyScore | FrequencyScore | MonetaryScore | RFM_Score | Value Segment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 | 1 | 1 | 2 | 4 | Low-Value |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 | 1 | 1 | 1 | 3 | Low-Value |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 | 1 | 1 | 1 | 3 | Low-Value |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 | 1 | 1 | 1 | 3 | Low-Value |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 | 1 | 1 | 2 | 4 | Low-Value |
# data.to_csv('rfm_output.csv')
# RFM Segment Distribution
segment_counts = data['Value Segment'].value_counts().reset_index()
segment_counts.columns = ['Value Segment', 'Count']
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | RecencyScore | FrequencyScore | MonetaryScore | RFM_Score | Value Segment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 | 1 | 1 | 2 | 4 | Low-Value |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 | 1 | 1 | 1 | 3 | Low-Value |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 | 1 | 1 | 1 | 3 | Low-Value |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 | 1 | 1 | 1 | 3 | Low-Value |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 | 1 | 1 | 2 | 4 | Low-Value |
# Define the pastel colour pallete
pastel_colors = px.colors.qualitative.Pastel
# Create the bar chart with pastel colors
fig_segment_dist = px.bar(segment_counts, x = 'Value Segment', y = 'Count', color = 'Value Segment',
color_discrete_sequence = pastel_colors, title = 'RFM Value Segment Distribution')
# Update the layout
fig_segment_dist.update_layout(xaxis_title = 'RFM Value Segment', yaxis_title = 'Count', showlegend = False)
# Show the figure
fig_segment_dist.show()
# Create a new column for RFM customer segments
data['RFM Customer Segments'] = ''
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | RecencyScore | FrequencyScore | MonetaryScore | RFM_Score | Value Segment | RFM Customer Segments | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 | 1 | 1 | 2 | 4 | Low-Value | |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 | 1 | 1 | 1 | 3 | Low-Value | |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 | 1 | 1 | 1 | 3 | Low-Value | |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 | 1 | 1 | 1 | 3 | Low-Value | |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 | 1 | 1 | 2 | 4 | Low-Value |
# Assign RFM segments based on RFM score
data.loc[data['RFM_Score'] >= 9, 'RFM Customer Segments'] = 'Champions'
data.loc[(data['RFM_Score'] >= 6) & (data['RFM_Score'] < 9), 'RFM Customer Segments'] = 'Potential Loyalist'
data.loc[(data['RFM_Score'] >= 5) & (data['RFM_Score'] < 6), 'RFM Customer Segments'] = 'At Risk Customers'
data.loc[(data['RFM_Score'] >= 4) & (data['RFM_Score'] < 5), 'RFM Customer Segments'] = "Can't Lose"
data.loc[(data['RFM_Score'] >= 3) & (data['RFM_Score'] < 4), 'RFM Customer Segments'] = 'Lost'
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | RecencyScore | FrequencyScore | MonetaryScore | RFM_Score | Value Segment | RFM Customer Segments | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 | 1 | 1 | 2 | 4 | Low-Value | Can't Lose |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 | 1 | 1 | 1 | 3 | Low-Value | Lost |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 | 1 | 1 | 1 | 3 | Low-Value | Lost |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 | 1 | 1 | 1 | 3 | Low-Value | Lost |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 | 1 | 1 | 2 | 4 | Low-Value | Can't Lose |
# Create a column Segment Product Count
segment_product_counts = data.groupby(['Value Segment', 'RFM Customer Segments']).size().reset_index(name = 'Count')
segment_product_counts = segment_product_counts.sort_values('Count', ascending = False)
data.head()
| CustomerID | PurchaseDate | TransactionAmount | ProductInformation | OrderID | Location | Recency | Frequency | MonetaryValue | RecencyScore | FrequencyScore | MonetaryScore | RFM_Score | Value Segment | RFM Customer Segments | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8814 | 2023-04-11 | 943.31 | Product C | 890075 | Tokyo | 286 | 1 | 943.31 | 1 | 1 | 2 | 4 | Low-Value | Can't Lose |
| 1 | 2188 | 2023-04-11 | 463.70 | Product A | 176819 | London | 286 | 1 | 463.70 | 1 | 1 | 1 | 3 | Low-Value | Lost |
| 2 | 4608 | 2023-04-11 | 80.28 | Product A | 340062 | New York | 286 | 1 | 80.28 | 1 | 1 | 1 | 3 | Low-Value | Lost |
| 3 | 2559 | 2023-04-11 | 221.29 | Product A | 239145 | London | 286 | 1 | 221.29 | 1 | 1 | 1 | 3 | Low-Value | Lost |
| 4 | 9482 | 2023-04-11 | 739.56 | Product A | 194545 | Paris | 286 | 1 | 739.56 | 1 | 1 | 2 | 4 | Low-Value | Can't Lose |
# Create a treemap with pastel colors
fig_treemap_segment_product = px.treemap(segment_product_counts,
path = ['Value Segment', 'RFM Customer Segments'],
values = 'Count',
color = 'Value Segment', color_discrete_sequence = px.colors.qualitative.Pastel,
title = 'RFM Customer Segments by Value')
C:\Users\Mahesh S Valanju\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. C:\Users\Mahesh S Valanju\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
# Show the figure
fig_treemap_segment_product.show()
# Filter the data to include only the customers in the champions segment
champions_segment = data[data['RFM Customer Segments'] == 'Champions']
# Create the box plot
champions_segment_fig = go.Figure()
champions_segment_fig.add_trace(go.Box(y = champions_segment['RecencyScore'], name = 'Recency'))
champions_segment_fig.add_trace(go.Box(y = champions_segment['FrequencyScore'], name = 'Frequency'))
champions_segment_fig.add_trace(go.Box(y = champions_segment['MonetaryScore'], name = 'Monetary'))
# Update the layout
champions_segment_fig.update_layout(title = 'Distribution of RFM Values within Champions Segment',
yaxis_title = 'RFM Value', showlegend = True)
# Show the figure
champions_segment_fig.show()
# Correlation Matrix
correlation_matrix = champions_segment[['RecencyScore', 'FrequencyScore', 'MonetaryScore']].corr()
# Visualize the correlation matrix using a heatmap
fig_corr_heatmap = go.Figure(data = go.Heatmap(
z = correlation_matrix.values,
x = correlation_matrix.columns,
y = correlation_matrix.columns,
colorscale = 'RdBu',
colorbar = dict(title = 'Correlation')))
# Update layout
fig_corr_heatmap.update_layout(title = 'Correlation Matrix of RFM Values Within Champions Segment')
# Show the figure
fig_corr_heatmap.show()
# Define the pastel colour pallete
pastel_colors = plotly.colors.qualitative.Pastel
# Customer segment counts
segment_counts = data['RFM Customer Segments'].value_counts()
# Create a bar chart to compare segment counts
comparison_fig = go.Figure(data = [go.Bar(x = segment_counts.index, y = segment_counts.values,
marker = dict(color = pastel_colors))])
# Set the color of the champions segment as a different color
champions_color = 'rgb(158, 202, 225)'
comparison_fig.update_traces(marker_color = [champions_color if segment == 'Champions' else pastel_colors[i]
for i, segment in enumerate(segment_counts.index)],
marker_line_color = 'rgb(8, 48, 107)',
marker_line_width = 1.5, opacity = 0.6)
# Update the layout
comparison_fig.update_layout(title = 'Comparison of RFM Segments',
xaxis_title = 'RFM Segments',
yaxis_title = 'Number of Customers',
showlegend = False)
# Show the figure
comparison_fig.show()
# Calculate the average Recency, Frequency and Monetary scores for each segment
segment_scores = data.groupby('RFM Customer Segments')['RecencyScore', 'FrequencyScore', 'MonetaryScore'].mean().reset_index()
C:\Users\Mahesh S Valanju\AppData\Local\Temp\ipykernel_23128\56607270.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
# Create a grouped bar chart to compare segment scores
fig = go.Figure()
# Add bars for Recency score
fig.add_trace(go.Bar(x = segment_scores['RFM Customer Segments'],
y = segment_scores['RecencyScore'],
name = 'Recency Score',
marker_color = 'rgb(158, 202, 225)'))
# Add bars for Frequency score
fig.add_trace(go.Bar(x = segment_scores['RFM Customer Segments'],
y = segment_scores['FrequencyScore'],
name = 'Frequency Score',
marker_color = 'rgb(94, 158, 217)'))
# Add bars for Monetary score
fig.add_trace(go.Bar(x = segment_scores['RFM Customer Segments'],
y = segment_scores['MonetaryScore'],
name = 'Monetary Score',
marker_color = 'rgb(32, 102, 148)'))
# Update the layout
fig.update_layout(title = 'Comparison of RFM Segments based on Recency, Frequency and Monetary Scores',
xaxis_title = 'RFM Segments',
yaxis_title = 'Score',
barmode = 'group',
showlegend = True)
# Show the figure
fig.show()
# Import required libraries for dahboard
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.io as pio
import plotly.colors as pc
# Initialize the Dash app
app = dash.Dash(__name__)
# Define the app layout using Bootstrap components
app.layout = html.Div([
html.H1('RFM Analysis Dashboard', className = 'text-center mb-4'),
html.Div('Analyze Customer Segments Based on RFM Scores.', className = 'text-center mb-4'),
# Dropdown for selecting the chart
dcc.Dropdown(
id = 'chart-type-dropdown',
options = [
{'label' : 'RFM Value Segment Distribution', 'value' : 'segment_distribution'},
{'label' : 'Distribution of RFM Values within Customer Segment', 'value' : 'RFM_distribution'},
{'label' : 'Correlation Matrix of RFM Values within Champions Segment', 'value' : 'correlation_matrix'},
{'label' : 'Comparison of RFM Segments', 'value' : 'segment_comparison'},
{'label' : 'Comparison of RFM Segments based on Scores', 'value' : 'segment_scores'},
],
value = 'segment_distribution', # Default selection
className = 'mb-4',
),
# Graph container
dcc.Graph(id = 'rfm-chart', className = 'mb-4')
])
# Define callback to update selected chart
@app.callback(
Output('rfm-chart', 'figure'),
[Input('chart-type-dropdown', 'value')]
)
def update_chart(selected_chart_type):
if selected_chart_type == 'segment_distribution':
return fig_segment_dist
elif selected_chart_type == 'RFM_distribution':
return fig_treemap_segment_product
elif selected_chart_type == 'correlation_matrix':
return fig_corr_heatmap
elif selected_chart_type == 'segment_comparison':
return comparison_fig
elif selected_chart_type == 'segment_scores':
return fig
# Return a default chart if no valid selection
return fig_segment_dist
if __name__ == '__main__':
app.run_server(port=8052)